FIXME:

source("utils.R")
source("gamm4_utils.R")
load_all_pkgs()
graphics_setup()
library('pander')  ## for tables
sapply(pkgList,function(x) format(packageVersion(x)))
##          lme4         gamm4         bbmle   broom.mixed          brms 
## "1.1.21.9002"       "0.2.5"    "1.0.23.1"  "0.2.4.9000"      "2.11.1" 
##    data.table       lattice     gridExtra       ggplot2       viridis 
##      "1.12.8"     "0.20.38"         "2.3"       "3.2.1"       "0.5.1" 
##        plotly       cowplot      ggstance          plyr         dplyr 
##       "4.9.1"       "1.0.0"       "0.3.3"       "1.8.5"       "0.8.4" 
##         tidyr        tibble         remef        r2glmm        raster 
##       "1.0.2"       "2.1.3"       "1.0.7"       "0.1.2"      "3.0.12" 
##         rgdal        fields       plotrix            sp    colorspace 
##       "1.4.8"        "10.3"       "3.7.7"       "1.3.2"       "1.4.1"
best_models <- readRDS("bestmodels_gamm4.rds")
ecoreg <- readRDS("ecoreg.rds")

Plots of the original variables:

taxon_names <- c("mbirds","mamph","mmamm")
respvars <- paste0(taxon_names,"_log")
predvars <- c("NPP_log_sc","Feat_log_sc","NPP_cv_sc","Feat_cv_sc")
## plot raw values of diversity for a given taxon/predictor
plotfun(model=NULL, respvar="mbirds_log",
        xvar="NPP_log_sc",
        backtrans=TRUE,log="xy")

## generate all combinations
orig_plots <- list()
for (i in seq_along(taxon_names)) {
    orig_plots[[taxon_names[i]]] <- list()
    for (pp in predvars) {
      orig_plots[[taxon_names[i]]][[pp]] <-
        plotfun(model=NULL,respvar=respvars[i],
                xvar=pp,data=ecoreg,
                backtrans=TRUE,log="xy")+
                theme(legend.position="none")
    }
}
plot_grid(plotlist=orig_plots[["mbirds"]],align="hv",nrow=2)

Plots of the transformed variables (log and scaled)

## names are the names of the variable in the data set;
##  the character strings are the names to put on the axis
## this time using log-scaled etc.
pred_names <- c(NPP_mean="NPP g*C/m2*year",
                Feat_mean="Feat (% of NPP)",
                NPP_cv_inter="CV of NPP",
                Feat_cv_inter = "CV of Feat")
sc_taxon_names <- paste(c("Birds","Amphibians","Mammals"),
                      "N sp/km2")
names(sc_taxon_names) <- paste0(c("mbirds","mamph","mmamm"),"_log")
sc_pred_names <- c(NPP_log_sc="log NPP g*C/m2*year",
                   Feat_log_sc="log Feat (% of NPP)",
                   NPP_cv_sc="scaled CV of NPP",
                   Feat_cv_sc = "scaled CV of Feat")
do_plots_1 <- function(taxon="mbirds",pred="NPP_mean",
                       s_names=taxon_names,
                       p_names=pred_names) {
    ggplot(ecoreg,aes_string(x=pred,y=taxon,colour="biome")) +
        geom_point() +
        theme(legend.position="none") +
        labs(y=s_names[taxon], x=p_names[pred])
}
## do all combinations
sc_plots <- list()
for (tt in names(sc_taxon_names)) {
    sc_plots[[tt]] <- list()
    for (pp in names(sc_pred_names)) {
        sc_plots[[tt]][[pp]] <- do_plots_1(tt,pp,
                                           p_names=sc_pred_names,
                                           s_names=sc_taxon_names)
    }
}

## for a given taxon, draw unscaled plots in row 1 and scaled plots in row 2
both_plots <- function(taxon) {
    plot_grid(plotlist=c(orig_plots[[taxon]],
                         sc_plots[[paste0(taxon,"_log")]]),
              nrow=2,
              align="hv")
}

Plots of residuals

I generated several “lists” of plots (using plotfun) with the lapply function following Ben’s code. In each case, I changed the xvar and auxvar to plot the 4 top most variables for each taxa (aside from NPP)

taxon <- "mbirds_log"
plotfun(best_models[[taxon]],
        respvar=taxon,
        xvar='Feat_log_sc',auxvar=NULL,data=ecoreg,backtrans=TRUE,log="xy")

## partial residuals plots
remef_plot <- function(taxon="mbirds_log",xvar="NPP_log",
                       auxvar=NULL,title=NULL) {
    m <- best_models[[taxon]]
    if (is.null(title)) {
        title <- if (is.null(auxvar)) xvar else {
             paste(xvar,auxvar,sep=":")                                     
                                              }
    }
    pp <- (plotfun(m,xvar=xvar,respvar="partial_res",
                   auxvar=auxvar,data=ecoreg,
                   backtrans=TRUE,log="xy") 
      + theme(legend.position="none")
      + ggtitle(title)
    )
    return(pp)
}
## rp1: NPP_log
## rp2: NPP_log:Feat_cv_sc
## rp3: NPP_log:Feat_log
## rp4: Feat_log:NPP_cv_sc
## rp5: Feat_log
## rp6: NPP_cv_sc
## rp7: Feat_cv_sc
rem_predvars <- list("NPP_log_sc",
                     "NPP_log_sc",
                     "NPP_log_sc",
                     "Feat_log_sc",
                     "Feat_log_sc",
                     "NPP_cv_sc",
                     "Feat_cv_sc")
rem_auxvars <- list(NULL,"Feat_cv_sc","Feat_log_sc",
                    "NPP_cv_sc",NULL,NULL,NULL)
## construct all combinations of partial residuals for each taxon
remef_plots <- list()
for (tt in names(sc_taxon_names)) { ## for each taxon
    remef_plots[[tt]] <- list()
    for (pp in seq_along(rem_predvars)) { ## for each predictor variable
      ## cat(tt,pp,"\n")
      ## cat(tt," ",pp,"\n")
      ## generate and save the partial residuals plot
      remef_plots[[tt]][[pp]] <- remef_plot(tt,xvar=rem_predvars[[pp]],
                                            auxvar=rem_auxvars[[pp]])
      ## print(remef_plots[[tt]][[pp]])
    }
}

Graphical outputs

Graphic example to see the color of each biome in the plots below

ggplot(ecoreg,aes(NPP_mean,mbirds,colour=biome)) + geom_point() + labs(x = "NPP (g*C/m2*year)", y = "Birds (N sp/km2)")

Amphibians

Original + transformed values (xy plot)

both_plots("mamph")

Top 4 effects

do.call(grid.arrange,
        c(remef_plots[["mamph_log"]][c(2,3,4,5)],
          ## NPP_log:Feat_csv, NPP_log:Feat_log, Feat_log:NPP_cv_sv, Feat_log
          list(ncol=2)))

Birds

Original + transformed values

both_plots("mbirds")

Top 4 effects

do.call(grid.arrange,
        c(remef_plots[["mbirds_log"]][c(5,2,6,7)],
               list(ncol=2)))

Mammals

Original + transformed values

both_plots("mmamm")

Top 4 effects

do.call(grid.arrange,
        c(remef_plots[["mmamm_log"]][c(7,5,2,4)],
          list(ncol=2)))

questions

  • why do fitted model lines look bad for amphibians? NA values/different mean? Amphibians are indeed the only data set with any NA values …
nna <- colSums(is.na(ecoreg))
nna[nna>0]
##     plants      mamph plants_log  mamph_log 
##          3          3          3          3
ee <- subset(ecoreg, select=c(Area,mamph_log,mbirds_log,
                              NPP_mean,NPP_cv_inter,Feat_mean,Feat_cv_inter))
predvars <- c("NPP_mean","NPP_cv_inter","Feat_mean","Feat_cv_inter","Area")
rbind(amph=colMeans(ee[!is.na(ee$mamph_log),predvars]),
      birds=colMeans(ee[!is.na(ee$mbirds_log),predvars]))
##       NPP_mean NPP_cv_inter  Feat_mean Feat_cv_inter         Area
## amph  571.1891   0.08425857 0.01902953      1.955229 201070382188
## birds 568.8940   0.08517630 0.01894535      1.962958 200694366936

Means are indeed slightly different, but is this enough to drive the observed difference?